#' ---
#' title: Replication Code for "How To Train Your Stochastic Parrot: Large Language Models for Political Texts" (Ornstein, Blasingame, & Truscott 2024)
#' date: 2024-10-01
#' version: 1.0
#' ---

## Figure 1: Twitter Sentiment Analysis Task --------
source('figure1.R') 

## Table 3: Sample of tweets where sentiment is ambiguous absent context --------

d |> 
  filter(text %in% c("Way to go SCOTUS!  You really celebrated PRIDE Month.",
                     "Happy Monday to everyone except the Supreme Court! Gay people deserve cakes to be made for them too!!!!!!",
                     "#SCOTUS reaffirms @realDonaldTrump is not above the law!",
                     "Inject Donald Trump’s tax returns directly into my veins. #SCOTUS")) |> 
  mutate(Tweet = text,
         Authors = ifelse(expert_score == -1, 'Negative', 'Positive'),
         LLMs = ifelse(Negative > Positive, 'Negative', 'Positive'),
         Dictionary = case_when(dictionary_sentiment < 0 ~ 'Negative',
                                dictionary_sentiment > 0 ~ 'Positive',
                                dictionary_sentiment == 0 ~ 'NA'),
         `Naive Bayes` = ifelse(prob_positive > 0.5, 'Positive', 'Negative'),
         TweetNLP = ifelse(tweetnlp_sentiment < 0, 'Negative', 'Positive'),
         .keep = 'none')



## Figure 2: Political Ad Tone --------------------

rm(list=ls())
source('figure2.R')

## Table A4: Sample of ads where GPT-3 and expert classifications diverged ----

d |> 
  filter(ids %in% c(7375, 7311, 7722)) |> 
  mutate(Text = text,
         `Expert Label` = ifelse(tone == 5, 'Negative', 'Positive'),
         `GPT-3 Label` = gpt_3_label,
         .keep = 'none')

## Figure 3: Ideology Scaling -------------------

rm(list = ls())
source('figure3.R')

## Figure 4: Congressional Speech Labels -------------

rm(list = ls())
source('figure4.R')

## Appendix D: List of Topic Labels -----------------

library(tinytable)

most_frequent_labels <- d2 |> 
  count(virtue) |> 
  arrange(-n) |> 
  slice_head(n = 24) |> 
  pull(virtue)
  
d2 |> 
  filter(virtue %in% most_frequent_labels) |> 
  count(virtue, party) |> 
  pivot_wider(names_from = 'party',
              values_from = 'n',
              values_fill = 0) |> 
  arrange(-D) |> 
  tt() |> 
  save_tt('table-d1.txt')


## Figure A1: Performance of GPT-3 Legacy Models -------------

rm(list = ls())
source('figure-A1.R')

## Figure A2: Sentence-level correlations for manifesto application ------------

rm(list=ls())
source('figure-A2.R')

## Table A4: Sentence-level correlations for manifesto application ------------------

gpt3_sentences |> 
  filter(policy %in% c('Social', 'Economic')) |> 
  filter(!is.na(num_coders)) |> 
  mutate(quantity_of_coders = if_else(num_coders >= 25, 
                                      'At Least 25 Crowd-Coders', 
                                      'All Sentences')) |> 
  group_by(quantity_of_coders, policy) |> 
  summarize(correlation = cor(crowd_ideology, gpt3_ideology),
            num_sentences = n()) |> 
  knitr::kable(format = 'simple')


## Figure B1: Twitter sentiment using Le Mens & Gallego approach ---------

rm(list = ls())
source('figure-B1.R')

## Figure B2: Manifesto ideology using Le Mens & Gallego approach ----------

rm(list = ls())
source('figure-B2.R')